home *** CD-ROM | disk | FTP | other *** search
- /*
- 224/290 03 Jun 90 22:09:20
- From: Ratko Tomic
- To: All
- Subj: SIEVE 5X8 + PICTURE
- Attr:
- ------------------------------------------------
- */
-
- /********** PART 1/3: Macros, Globals and count_all() ******************/
-
- /************************************************************************
- SIEVE5x8 (Primes to 1.8 Million in < 60K)
- Author: Ratko V. Tomic
- *************************************************************************
- This program computes primes using Sieve with bits used for flags and
- by removing multiples of 2,3 and 5 ahead of time. This allows computation
- of primes up to 30*(Number of bytes available). Thus even on a PC with
- 520K array (in huge model) you can compute primes up to 15.97 Million,
- good for factoring of numbers up 2.55e14.
- *************************************************************************/
- #include <stdio.h>
-
- unsigned _stack = 1024;
-
- typedef unsigned char byte;
- typedef unsigned word;
-
- #define S 60000 /* S*30 = 1,800,000 primes, Small model & 4K stack */
- /* To increase the limit S, reduce stack size. */
-
- #define M (S*8) /* Maximum bits available */
- #define T (S*30) /* Upper limit for #'s */
- byte a[S+1]; /* Bit map holding S*30 primes, must be all 0's */
- /* If iterated, re-initialization is NOT needed. */
-
- byte x[8]={1,7,11,13,17,19,23,29}; /* Numbers left from 1-30 */
- byte y[8]={12,18,21,27,30,36,47,49}; /* Used to calculate steps */
-
- #define M6(x) (((x)+(x)+(x))<<1) /* Multiply by 6 */
- #define L3(x) ((unsigned long)(x)<<1<<1<<1) /* Shift left by 3 */
- #define R3(x) ((unsigned long)(x)>>1>>1>>1)
- #define R2(x) ((unsigned long)(x)>>1>>1)
-
- #define cvstep(zz,z,bz) z=((word)R3(zz)), bz=((word)zz&7)
- #define set(j,bj) a[j]|=1<<bj
- #define adv(j,bj,k,bk) j+=k; bj+=bk; if (bj>7) j++, bj-=8
-
- #define bn(n) (n&15?(n&3?(n&1?0:1):(n&4?2:3)):(n&48?(n&16?4:5):(n&64?6:7)))
-
- /*** Count primes after sieve5x8() has finished, called from main() ***/
-
- long count_all(void) /* Compute total number of primes found */
- { int c; /* which is: 3 + Num_of_0_bits_in_a[] */
- word w,i;
- long t=3; /* Account for primes 2,3,5 */
- for(i=0; i<S/2; i++) /* Process a[] in word chunks */
- { /* Byte order doesn't matter */
- w=~*((word*)a+i); /* There are less 0 bits -> use ~ */
- for (c=0; w; ++c) w&=w-1; /* Count bits set to 1 in w */
- t+=c; /* Accumm. total 0 bits from a[] */
- }
- return t;
- }
-
-
- /*** PART 2/3: sieve5x8() and utility gen_up2() ****/
-
- sieve5x8() /** COMPUTE PRIMES < 30*S **/
- { long P,n,d;
- register word i,bi;
- word j,bm,sav_i,sav_bi;
- static long ss[8]; /* 8 Steps used to cross out multiples of P */
- static word s[8],b[8]; /* Steps in byte offsets and bit shifts */
- for (i=0,a[i--]=1;;)
- {
- do i++; while(a[i]==0xFF); /* Find next 0 bit in a[] */
- if (i>=S) return; /* End of array reached */
- bi=~a[i]; sav_i=i;
- bm=-bi&bi; /* Bit mask=lowest bit set in bi */
- n=L3(i)+(bi=bn(bi)); /* Get index n for prime found */
- do { /* Process bits in their own loop */
- d=(n-1)&~7L; /* == ((n-1)/8)*8; */
- ss[0]=ss[6]=M6(d)+y[(bi-1)&7];/* == ((n-1)/8)*48+y[(n-1)%8]; */
- if ((d=ss[0]+n)>M) return; /* Done - no more flags to be set */
- P=(long)i*30+x[bi];; /* Compute prime P from the index */
- ss[7]=n+n+1; /* Compute remaining 7 steps */
- ss[3]=(R2(n+4)&~1L)+P; /* == ((n+4)/8)*2+P; */
- ss[2]=ss[4]=i+((1+P)>>1); /* == (n/8)+(1+P)/2; */
- ss[1]=ss[5]=R2(n+2)+P; /* == (n+2)/4+P; */
- for (j=3;j<8;j++) /* Convert steps to byte step s[] */
- cvstep(ss[j],s[j],b[j]); /* and bit step b[] */
- s[0]=s[6];s[1]=s[5];s[2]=s[4];/* Clone remaining 3 byte offsets */
- b[0]=b[6];b[1]=b[5];b[2]=b[4];/* same for bit steps */
- sav_bi=bi; j=i; i=0;
- do { /* Tag mult of P in 8 steps ss[j] */
- adv(j,bi,s[i],b[i]); /* Advance to next postion in a[] */
- set(j,bi); /* Set bit for that position */
- ++i; i&=7; /* Advance to next step (i%8) */
- }
- while((d+=ss[i])<M);
- i=sav_i; bi=sav_bi;
- do n++, bi++, bm<<=1;
- while(bi<8 && a[i]&bm); /* Get next 0 bit in this byte */
- }
- while(bi<8);
- }
- }
-
- long gen_up2(long h) /*** Generate primes<=h, where: 6 < h < T ) ***/
- { word i=0,k=1; /* This func. is not called for benchmark */
- long c=3,n,b=0; /* It assumes sieve5x8() was already called */
- byte m=2;
- while(1)
- {
- n=b+x[k++]; /* n=Next number to check */
- if (n>h) return(c);
- if (!(a[i]&m)) c++; /* Prime found (its value is n) */
- if (!(m+=m)) /* Advance to next bit */
- {
- m=1; i++; k=0; b+=30;
- if (i>=S) return(c);
- }
- }
- }
-
- /** PART 3/3: main(), ITER=5 **/
-
- /****** PC Time calculations ******/
-
- #define pc_time (*(long far*)0x46C) /* Get IBM tick count */
- time_diff(long t,int *s,int *hs) /* Compute seconds & 1/100's */
- { long dt;
- if ((dt=pc_time-t)<0) dt+=0x1800B0L; /* Adjust for midnight */
- dt*=1000; *s=(int)(dt/18207);
- *hs=(int)(dt%18207)/182;
- if (*hs>100) *hs=99;
- }
-
- /*** TEST SHELL ***/
-
- #define ITER 5 /* Iterations for benchmark (on 4.77 PC set to 1) */
-
- #define d3(n) (int)((long)(n)/1000000L) /* Millions */
- #define d2(n) (int)(((long)(n)%1000000L)/1000) /* Thousands */
- #define d1(n) (int)((long)(n)%1000) /* Ones */
- long primes; /* Total primes found */
-
- main()
- { long t0;
- int i,sec,s100;
- printf("Computing primes up to %d,%03d,%03d... ",d3(T),d2(T),d1(T));
- t0=pc_time; /* Timer ticks used for accuracy */
- for (i=0; i<ITER; i++)
- sieve5x8(), /* Construct primes < T=1,800,000 */
- primes=count_all(); /* Count number of primes found */
- time_diff(t0,&sec,&s100); /* Result should be 135,072 primes */
- printf("Found %d,%03d primes.\n",d2(primes),d1(primes));
- printf("TIME: %d.%02d seconds.\n",sec,s100);
- }
- /*
- ---------------------- END OF C CODE -----------------------------------
-
- This is new version for the benchmark I posted recently. The main new
- addon is the new algorithm which eliminates primes 2,3,5 (instead of
- 2,3 only). This boosts the packing density from 3 to 3.75 numbers/bit,
- or from 24 to 30 numbers per byte. The new algorithm is in the sieve5x8().
- Some macros were rewritten and some code improvements were done, as result
- it takes roughly same time as sieve3x8() per iteration, even though it's
- more complex and generates more primes (1,800,000 for 60,000 byte array).
- For more accuracy (on a 386/25Mhz), 5 ITERATIONS were used:
-
- Compiler TIME (sec) CODE (bytes) EXE SIZE
- --------------------------------------------------------------
- TopSpeed 2.02 27.02 1105 8197 (winner again)
- Watcom 7.0 34.16 1164 6950
- MSC 5.0 35.09 1266 8827
- Turbo C 2.0 35.75 1246 8056 (last again)
- --------------------------------------------------------------
-
- THE BIG PICTURE - If you have VGA or MCGA video adapter try this: Take
- the content of a[S] word at a time (2 bytes) and use it as an offset
- into video RAM, (video mode 0x13: 320x200, 256 colors, flat 2-D address).
- At that offset increment a byte to new (brighter) color. Do that for all
- 30,000 words in a[]. Quite surprising ?! It also works with sieve3x8()
- and other video cards. Also, the byte ordering in a word doesn't matter.
-
- --- TBBS v2.1/NM
- * Origin: MSI S/W BBS-Fram. MA-(508) 875-8009-CodeRunneR-HandiWARE (322/327)
- */
-
-